using System;
using System.Runtime.InteropServices;
using DynaPDF;

namespace text_coordinates
{
   public class CTextCoordinates
   {
      protected struct TGState
      {
         public IntPtr    ActiveFont;
         public float     CharSpacing;
         public double    FontSize;
         public TFontType FontType;
         public TCTM      Matrix;
         public TDrawMode TextDrawMode;
         public float     TextScale;
         public float     WordSpacing;
      }
      protected class CStack
      {
         public bool Restore(ref TGState F)
         {
            if (m_Count > 0)
            {
               --m_Count;
               F = m_Items[m_Count];
               return true;
            }
            return false;
         }
         public int Save(ref TGState F)
         {
            if (m_Count == m_Capacity)
            {
               m_Capacity += 16;
               try
               {
                  TGState[] tmp = new TGState[m_Capacity];
                  if (m_Items != null) m_Items.CopyTo(tmp, 0);
                  m_Items = tmp;
                  tmp = null;
               }
               catch
               {
                  m_Capacity -= 16;
                  return -1;
               }
            }
            m_Items[m_Count] = F;
            ++m_Count;
            return 0;
         }
         private uint      m_Capacity;
         private uint      m_Count;
         private TGState[] m_Items;
      }

      internal CTextCoordinates(CPDF PDFInst)
      {
         m_GState.ActiveFont   = IntPtr.Zero;
         m_GState.CharSpacing  = 0.0f;
         m_GState.FontSize     = 1.0;
         m_GState.FontType     = TFontType.ftType1;
         m_GState.Matrix.a     = 1.0;
         m_GState.Matrix.b     = 0.0;
         m_GState.Matrix.c     = 0.0;
         m_GState.Matrix.d     = 1.0;
         m_GState.Matrix.x     = 0.0;
         m_GState.Matrix.y     = 0.0;
         m_GState.TextDrawMode = TDrawMode.dmNormal;
         m_GState.TextScale    = 100.0f;
         m_GState.WordSpacing  = 0.0f;
         m_PDF                 = PDFInst;
         m_Stack               = new CStack();
      }

      public int BeginTemplate(TPDFRect BBox, IntPtr Matrix)
      {
         if (SaveGState() < 0) return -1;
         if (!IntPtr.Zero.Equals(Matrix))
         {
            TCTM m = (TCTM)Marshal.PtrToStructure(Matrix, typeof(TCTM));
            m_GState.Matrix = MulMatrix(m_GState.Matrix, m);
         }
         return 0;
      }

      public void EndTemplate()
      {
         RestoreGState();
      }

      public void Init()
      {
         while (RestoreGState());
         m_Count               = 0;
         m_GState.ActiveFont   = IntPtr.Zero;
         m_GState.CharSpacing  = 0.0f;
         m_GState.FontSize     = 1.0;
         m_GState.FontType     = TFontType.ftType1;
         m_GState.Matrix.a     = 1.0;
         m_GState.Matrix.b     = 0.0;
         m_GState.Matrix.c     = 0.0;
         m_GState.Matrix.d     = 1.0;
         m_GState.Matrix.x     = 0.0;
         m_GState.Matrix.y     = 0.0;
         m_GState.TextDrawMode = TDrawMode.dmNormal;
         m_GState.TextScale    = 100.0f;
         m_GState.WordSpacing  = 0.0f;
      }

      public int MarkCoordinates(TCTM Matrix, TTextRecordA[] Source, TTextRecordW[] Kerning, int Count, double Width, bool Decoded)
      {
         if (!Decoded) return 0;
         try
         {
            /*
               Note that we write lines to the page while we parsed it. This is critical because the parser
               doesn't notice when a fatal error occurs, e.g. out of memory. We must make sure that processing
               breaks immediatly in such a case. To archive this we check the return value of StrokePath() since
               the only reason why this function can fail is out of memory.
            */
            int i = 0;
            double x1 = 0.0, y1 = 0.0, x2 = 0.0, y2 = 0.0, textWidth = 0.0;

            // Transform the text matrix to user space
            TCTM m = MulMatrix(m_GState.Matrix, Matrix);
            Transform(m, ref x1, ref y1); // Start point of the text record

            /*
               This code draws lines under each text record of a PDF file to check whether the coordinates are correct.
               It shows also how word spacing must be handled. You need an algorithm like this one if you want to
               develop a text extraction algorithm that tries to preserve the original text layout. Note that word
               spacing must be ignored when a CID font is selected. In addition, word spacing is applied to the space
               character (32) of the non-translated source string only. The Unicode string cannot be used to determine
               whether word spacing must be applied because the character can be encoded to an arbitrary Unicode character.
            */
            if (m_GState.FontType == TFontType.ftType0)
            {
               // Word spacing must be ignored if a CID font is selected!
               for (i = 0; i < Count; i++)
               {
                  if (Kerning[i].Advance != 0.0f)
                  {
                     textWidth -= Kerning[i].Advance;
                     x1 = textWidth;
                     y1 = 0.0;
                     Transform(m, ref x1, ref y1);
                  }
                  textWidth += Kerning[i].Width;
                  x2 = textWidth;
                  y2 = 0.0;
                  Transform(m, ref x2, ref y2);

                  m_PDF.MoveTo(x1, y1);
                  m_PDF.LineTo(x2, y2);
                  if ((m_Count & 1) != 0)
                     m_PDF.SetStrokeColor(CPDF.PDF_RED);
                  else
                     m_PDF.SetStrokeColor(CPDF.PDF_BLUE);
                  if (!m_PDF.StrokePath()) return -1;
                  x1 = x2;
                  y1 = y2;
               }
            }else
            {
               int j, last;
               long ptr;
               String src;
               // This code draws lines under line segments which are separated by one or more space characters. This is important
               // to handle word spacing correctly. The same code can be used to compute word boundaries of Ansi strings.
               for (i = 0; i < Count; i++)
               {
                  j    = 0;
                  last = 0;
                  if (Source[i].Advance != 0.0f)
                  {
                     textWidth -= Source[i].Advance;
                     x1 = textWidth;
                     y1 = 0.0;
                     Transform(m, ref x1, ref y1);
                  }
                  src = Marshal.PtrToStringAnsi(Source[i].Text, Source[i].Length);
                  ptr = (long)Source[i].Text;
                  while (j < src.Length)
                  {
                     if (src[j] != 32)
                        ++j;
                     else
                     {
                        if (j > last)
                        {
                           // Note that the text must be taken from the Source array!
                           textWidth += m_PDF.GetTextWidth( m_GState.ActiveFont,
                                                            new IntPtr(ptr + last),
                                                            j - last,
                                                            m_GState.CharSpacing,
                                                            m_GState.WordSpacing,
                                                            m_GState.TextScale);
                           x2 = textWidth;
                           y2 = 0.0;
                           Transform(m, ref x2, ref y2);
                           m_PDF.MoveTo(x1, y1);
                           m_PDF.LineTo(x2, y2);
                           if ((m_Count & 1) != 0)
                              m_PDF.SetStrokeColor(CPDF.PDF_RED);
                           else
                              m_PDF.SetStrokeColor(CPDF.PDF_BLUE);
                           if (!m_PDF.StrokePath()) return -1;
                        }
                        last = j++;
                        while (j < src.Length && src[j] == 32)
                        {
                           ++j;
                        }
                        textWidth += m_PDF.GetTextWidth( m_GState.ActiveFont,
                                                         new IntPtr(ptr + last),
                                                         j - last,
                                                         m_GState.CharSpacing,
                                                         m_GState.WordSpacing,
                                                         m_GState.TextScale);
                        last = j;
                        x1 = textWidth;
                        y1 = 0.0;
                        Transform(m, ref x1, ref y1);
                     }
                  }
                  if (j > last)
                  {
                     textWidth += m_PDF.GetTextWidth( m_GState.ActiveFont,
                                                      new IntPtr(ptr + last),
                                                      j - last,
                                                      m_GState.CharSpacing,
                                                      m_GState.WordSpacing,
                                                      m_GState.TextScale);
                     x2 = textWidth;
                     y2 = 0.0;
                     Transform(m, ref x2, ref y2);
                     m_PDF.MoveTo(x1, y1);
                     m_PDF.LineTo(x2, y2);
                     if ((m_Count & 1) != 0)
                        m_PDF.SetStrokeColor(CPDF.PDF_RED);
                     else
                        m_PDF.SetStrokeColor(CPDF.PDF_BLUE);
                     if (!m_PDF.StrokePath()) return -1;
                  }
                  x1 = x2;
                  y1 = y2;
               }
            }
            ++m_Count;
            return 0;
         }catch
         {
            return -1;
         }
      }

      public void MulMatrix(TCTM Matrix)
      {
         m_GState.Matrix = MulMatrix(m_GState.Matrix, Matrix);
      }

      private TCTM MulMatrix(TCTM M1, TCTM M2)
      {
         TCTM retval;
         retval.a = M2.a * M1.a + M2.b * M1.c;
         retval.b = M2.a * M1.b + M2.b * M1.d;
         retval.c = M2.c * M1.a + M2.d * M1.c;
         retval.d = M2.c * M1.b + M2.d * M1.d;
         retval.x = M2.x * M1.a + M2.y * M1.c + M1.x;
         retval.y = M2.x * M1.b + M2.y * M1.d + M1.y;
         return retval;
      }

      public bool RestoreGState()
      {
         return m_Stack.Restore(ref m_GState);
      }

      public int SaveGState()
      {
         return m_Stack.Save(ref m_GState);
      }

      public void SetCharSpacing(double Value)
      {
         m_GState.CharSpacing = (float)Value;
      }

      public void SetFont(IntPtr Font, double FontSize, TFontType Type)
      {
         m_GState.ActiveFont = Font;
         m_GState.FontSize   = FontSize;
         m_GState.FontType   = Type;
      }

      public void SetTextDrawMode(TDrawMode Mode)
      {
         m_GState.TextDrawMode = Mode;
      }

      public void SetTextScale(double Value)
      {
         m_GState.TextScale = (float)Value;
      }

      public void SetWordSpacing(double Value)
      {
         m_GState.WordSpacing = (float)Value;
      }

      private void Transform(TCTM M, ref double x, ref double y)
      {
         double tx = x;
         x = tx * M.a + y * M.c + M.x;
         y = tx * M.b + y * M.d + M.y;
      }

      protected int      m_Count;
      protected TGState  m_GState;
      internal  CPDF     m_PDF;
      protected CStack   m_Stack;
   }
}
